library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.4
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
surveys_complete <- read_csv("data/portal_data_joined.csv")
## Parsed with column specification:
## cols(
## record_id = col_double(),
## month = col_double(),
## day = col_double(),
## year = col_double(),
## plot_id = col_double(),
## species_id = col_character(),
## sex = col_character(),
## hindfoot_length = col_double(),
## weight = col_double(),
## genus = col_character(),
## species = col_character(),
## taxa = col_character(),
## plot_type = col_character()
## )
ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
geom_point()
## Warning: Removed 4048 rows containing missing values (geom_point).
surveys_plot <- ggplot(data = surveys_complete,
mapping = aes(x = weight, y = hindfoot_length))
surveys_plot+
geom_point()
## Warning: Removed 4048 rows containing missing values (geom_point).
library("hexbin")
surveys_plot +
geom_hex()
## Warning: Removed 4048 rows containing non-finite values (stat_binhex).
A hexagonal is more useful when there is a high density of points in an area compared to a scatter plot. On the other hand if there are less points, a scatterplot is more precise.
ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
geom_point(alpha = 0.1)
## Warning: Removed 4048 rows containing missing values (geom_point).
ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
geom_point(alpha = 0.1, color = "blue")
## Warning: Removed 4048 rows containing missing values (geom_point).
ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
geom_point(alpha = 0.1, aes(color = species_id))
## Warning: Removed 4048 rows containing missing values (geom_point).
str(surveys_complete)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 34786 obs. of 13 variables:
## $ record_id : num 1 72 224 266 349 363 435 506 588 661 ...
## $ month : num 7 8 9 10 11 11 12 1 2 3 ...
## $ day : num 16 19 13 16 12 12 10 8 18 11 ...
## $ year : num 1977 1977 1977 1977 1977 ...
## $ plot_id : num 2 2 2 2 2 2 2 2 2 2 ...
## $ species_id : chr "NL" "NL" "NL" "NL" ...
## $ sex : chr "M" "M" NA NA ...
## $ hindfoot_length: num 32 31 NA NA NA NA NA NA NA NA ...
## $ weight : num NA NA NA NA NA NA NA NA 218 NA ...
## $ genus : chr "Neotoma" "Neotoma" "Neotoma" "Neotoma" ...
## $ species : chr "albigula" "albigula" "albigula" "albigula" ...
## $ taxa : chr "Rodent" "Rodent" "Rodent" "Rodent" ...
## $ plot_type : chr "Control" "Control" "Control" "Control" ...
## - attr(*, "spec")=
## .. cols(
## .. record_id = col_double(),
## .. month = col_double(),
## .. day = col_double(),
## .. year = col_double(),
## .. plot_id = col_double(),
## .. species_id = col_character(),
## .. sex = col_character(),
## .. hindfoot_length = col_double(),
## .. weight = col_double(),
## .. genus = col_character(),
## .. species = col_character(),
## .. taxa = col_character(),
## .. plot_type = col_character()
## .. )
ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
geom_point(alpha = 0.1, aes(color = plot_type))
## Warning: Removed 4048 rows containing missing values (geom_point).
This is not a good way to show this type of data because the different colors are too mixed together and small to contribute any information to the graph.
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
geom_boxplot()
## Warning: Removed 2503 rows containing non-finite values (stat_boxplot).
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
geom_boxplot(alpha = 0) +
geom_jitter(alpha = 0.3, color = "tomato")
## Warning: Removed 2503 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2503 rows containing missing values (geom_point).
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
geom_jitter(alpha = 0.3, color = "tomato")+
geom_boxplot(alpha = 0)
## Warning: Removed 2503 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2503 rows containing missing values (geom_point).
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
geom_jitter(alpha = 0.3, color = "tomato")+
geom_violin()
## Warning: Removed 2503 rows containing non-finite values (stat_ydensity).
## Warning: Removed 2503 rows containing missing values (geom_point).
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
geom_jitter(alpha = 0.3, color = "tomato")+
geom_violin()+
scale_y_log10()
## Warning: Removed 2503 rows containing non-finite values (stat_ydensity).
## Warning: Removed 2503 rows containing missing values (geom_point).
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = hindfoot_length)) +
geom_jitter(alpha = 0.3, aes(color = plot_id)) +
geom_boxplot(alpha = 0)
## Warning: Removed 3348 rows containing non-finite values (stat_boxplot).
## Warning: Removed 3348 rows containing missing values (geom_point).